# 
#  model2.r
#  
# This file replicates results for the main model with t-distributed random effects 
# It: (1) prepares the data (centering, z-std.), (2) runs JAGS, 
# (3) calculates posterior summaries and quantities of interest
# 
# Note: the corresponding JAGS model file is "model2.bug"
# 



rm(list=ls())

# Load Libs
library(foreign)
library(rjags)
library(coda)
# Load helper functions
source("functions.R")



# =========================================================================
# =                         Data                                  		  =
# =========================================================================

dat <- read.dta("data1samp.dta", convert.factors = FALSE)

## partental background dummies
dat$zg1 <- ifelse(dat$zgold==1, 1, 0)
dat$zg2 <- ifelse(dat$zgold==2, 1, 0)
dat$zg3 <- ifelse(dat$zgold==3, 1, 0)
dat$zg4 <- ifelse(dat$zgold==4, 1, 0)
## standardize continuous vars
dat$age <- zstd(dat$age, gelman=TRUE)
dat$nkids <- zstd(dat$nkids, gelman=TRUE)
dat$hhsize <- zstd(dat$hhsize, gelman=TRUE)
dat$ownocc <- zstd(dat$ownocc, gelman=TRUE)
dat$hsequi <- zstd(dat$hsequi, gelman=TRUE)
dat$incshr <- zstd(dat$incshr, gelman=TRUE)
dat$perminc <- zstd(dat$perminc, gelman=TRUE)
dat$transinc <- zstd(dat$transinc, gelman=TRUE)
## center dummy vars
dat$div <- center(dat$div)
dat$uempl <- center(dat$uempl)
dat$union <- center(dat$union)
dat$fem <- center(dat$fem)
dat$nonwhite <- center(dat$nonwhite)
dat$degree <- center(dat$degree)
dat$alvl <- center(dat$alvl)
dat$olvl <- center(dat$olvl)
dat$zg1 <- center(dat$zg1)
dat$zg2 <- center(dat$zg2)
dat$zg3 <- center(dat$zg3)
dat$zg4 <- center(dat$zg4)
dat$london <- center(dat$london)

## reshape data:
## time varying variables
y <- as.matrix(reshape(subset(dat, select=c(y,pid,waven)), 
	timevar="waven", idvar="pid", direction="wide")[,-1])
age <- as.matrix(reshape(subset(dat, select=c(age,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
nkids <- as.matrix(reshape(subset(dat, select=c(nkids,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
hhsize <- as.matrix(reshape(subset(dat, select=c(hhsize,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
ownocc <- as.matrix(reshape(subset(dat, select=c(ownocc,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
hsequi <- as.matrix(reshape(subset(dat, select=c(hsequi,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
incshr <- as.matrix(reshape(subset(dat, select=c(incshr,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
perminc <- as.matrix(reshape(subset(dat, select=c(perminc,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
transinc <- as.matrix(reshape(subset(dat, select=c(transinc,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
div <- as.matrix(reshape(subset(dat, select=c(div,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
uempl <- as.matrix(reshape(subset(dat, select=c(uempl,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
union <- as.matrix(reshape(subset(dat, select=c(union,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])

# Time constant variables
fem <- reshape(subset(dat, select=c(female,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
nw <- reshape(subset(dat, select=c(nonwhite,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
degree <- reshape(subset(dat, select=c(degree,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
alvl <- reshape(subset(dat, select=c(alvl,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
olvl <- reshape(subset(dat, select=c(olvl,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
zg1 <- reshape(subset(dat, select=c(zg1,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
zg2 <- reshape(subset(dat, select=c(zg2,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
zg3 <- reshape(subset(dat, select=c(zg3,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
zg4 <- reshape(subset(dat, select=c(zg4,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
lond <- reshape(subset(dat, select=c(london,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]


N <- dim(y)[1]


jags.data <- gen.list(c("y", "age", "nkids", "hhsize", "ownocc", "hsequi", "incshr", "perminc", "transinc", "div", "uempl", "fem", "nw", "degree", "alvl", "olvl", "zg1", "zg2", "zg3", "zg4", "union", "lond", "N"))



# ====================================================================
# =                    get MCMC samples                              =
# ====================================================================

load.module("glm")

# cutpoint initial values
z <- jags.data$y
z[z==0] <- -0.5
z[z==1] <- 0.5
z[z==2] <- 1.5
## coefficient initial values
temp1 <- subset(dat, dat$waven==1)
temp2 <- subset(dat, dat$waven>1)
coefb <- coef(MASS::polr(as.factor(y)~age+nkids+hhsize+ownocc+hsequi+incshr+perminc+transinc+div+
	uempl+female+nonwhite+degree+alvl+olvl+union, data=temp2))
coefd <- coef(MASS::polr(as.factor(y)~age+nkids+hhsize+ownocc+hsequi+incshr+perminc+transinc+div+
	uempl+female+nonwhite+degree+alvl+olvl+union+zg1+zg2+zg3+zg4+london,data=temp1))

## create list of initial values
init <- list(
	list(z=z, gamma=.224, scale=1.156, sigma=.941, beta=coefb, delta=coefd, cut.delta=0.81272,
		 "RNG.seed"=12345),
	list(z=z, gamma=.224, scale=1.156, sigma=.941, beta=coefb, delta=coefd, cut.delta=0.81272,
		"RNG.seed"=54321))

## set up model
m2 <- jags.model(file="model2.bug", data=jags.data, n.chains=2, n.adapt=2e3, init=init)
update(m2, 2e4)

## sample...
mon <- c("alpha", "beta", "gamma", "delta", "sigma", "scale","cut[2]", 
	"ARerr", "TMatch", "ppp.obs", "ppp.pred")
m2.out <- coda.samples(m2, variable.names=mon, n.iter=2e4*25, thin=25)

## save samples
save(m2.out, file="model2_samples.Rdata")




# ====================================================================
# =                       Analyses                                   =
# ====================================================================

## Convergence diagnostics

## Gelman-Rubin diag (excludes post. pred. check values)
gelman.diag(mcmc.list(m2.out[[1]][,c(4:43,50,51)], m2.out[[2]][,c(4:43,50,51)]))
## combine samples from 2 chains
m2.out <- runjags::combine.mcmc(list(m2.out[[1]],m2.out[[2]]))
## Geweke diagnostics
geweke.diag(m2.out)



# ==============================================
# =     Posterior summary   
# ==============================================

sum.coda(m2.out)

# Individual effect variance (samples are for SD)
sum.coda(as.matrix(m2.out[,51]^2))
















